#!/usr/bin/env bash
# set environment variables (if not already done)
export PYTHON_ROOT=/usr/local/python2.7
export PYSPARK_DRIVER_PYTHON=/usr/local/python2.7/bin/python2.7
export PYSPARK_PYTHON=/usr/local/python2.7/bin/python2.7
export LD_LIBRARY_PATH=${PATH}
export PYSPARK_PYTHON=${PYTHON_ROOT}/bin/python2.7
export SPARK_YARN_USER_ENV="PYSPARK_PYTHON=/usr/local/python2.7/bin/python2.7"
export PATH=${PYTHON_ROOT}/bin/:$PATH
export QUEUE=default
export LIB_HDFS=$HADOOP_HOME/lib/native
export LIB_JVM=$JAVA_HOME/jre/lib/amd64/server
export HADOOP_PREFIX=$HADOOP_HOME
${SPARK_HOME}/bin/spark-submit \
--master yarn \
--deploy-mode client \
--queue ${QUEUE} \
--num-executors 2 \
--executor-memory 2G \
--archives hdfs:///user/${USER}/tf/python.zip#python,hdfs:///user/${USER}/tf/mnist.zip#mnist \
${TFoS_HOME}/examples/mnist/mnist_data_setup.py \
--output mnist/csv \
--format csv

